RNN for pictures genaration

This notebook is an experiment. It generate a picture pixel by pixel using an RNN.
WIP

Import needed for Jupiter


In [1]:
%matplotlib notebook
import matplotlib
import matplotlib.pyplot as plt

from IPython.display import Image

Import needed for the code


In [2]:
import numpy as np
import tensorflow as tf

import fnmatch, os
import time
import urllib

from PIL import Image as Img

Helpers functions

to save a picture


In [3]:
#need to be called within a session
def write_png(tensor, name):
    casted_to_uint8 = tf.cast(tensor, tf.uint8)
    converted_to_png = tf.image.encode_png(casted_to_uint8)
    f = open(name, "wb+")
    f.write(converted_to_png.eval())
    f.close()

A class to define all args


In [15]:
class Args():
    def __init__(self):
        '''directory to store checkpointed models'''
        self.save_dir = 'save_face_training_0.5_128'
        
        '''Picture size'''
        self.picture_size = 64
    
        self.vocab_size = 16
    
        '''size of RNN hidden state'''
        self.rnn_size = 300

        '''RNN sequence length'''
        self.seq_length = self.picture_size*4
        '''number of epochs'''
        self.num_epochs = 5
        '''save frequency'''
        self.save_every = 50 # was 500
        '''Print frequency'''
        self.print_every = 10
        '''clip gradients at this value'''
        self.grad_clip = 5.
        '''learning rate'''
        self.learning_rate = 0.002 # was 0.002
        '''decay rate for rmsprop'''
        self.decay_rate = 0.98
        """continue training from saved model at this path.
        Path must contain files saved by previous training process: """
        self.init_from = 'save_face_training_0.5_128'
        #self.init_from = None

In [5]:
class FaceLoader:
    def __init__(self):
        self.url = "https://upload.wikimedia.org/wikipedia/commons/a/a1/Alan_Turing_Aged_16.jpg"
        self.filename = "turing.jpg"
        self.vocab_size = 16
    
    def load_data(self, image_size):
        self.size = image_size
        urllib.urlretrieve (self.url, self.filename) 
        im = Img.open(self.filename)
        width, height = im.size
        print "loaded", im
        im = im.crop([0,0,width,width])
        im = im.resize([image_size, image_size])
        im = im.convert("L")
        image = np.array(im)
        # convert to float32 and scale
        face = tf.cast(image, tf.float32)/255.0
        self.picture = tf.constant(face.eval())
        im = Img.fromarray((face.eval()* 255).astype(np.uint8))
        im.save("training.jpeg")
                  
    def create_batch(self, batch_size, num_batches):
        xdata = tf.squeeze(self.picture).eval()
        xdata = np.reshape(xdata, [batch_size*num_batches, 1])
        ydata = np.copy(xdata)
        ydata[:-1] = xdata[1:]
        ydata[-1] = xdata[0]
        self.x_batches = np.split(xdata, num_batches, axis=0)
        self.y_batches = np.split(ydata, num_batches, axis=0)
        self.pointer = 0
        
    def vectorize(self, x):
        vectorized = np.zeros([len(x), self.vocab_size])
        for i in range(0, len(x)):
            vectorized[i][int(x[i]*self.vocab_size)] = 1
        return vectorized
        
    def next_batch(self):
        x, y = self.x_batches[self.pointer], self.y_batches[self.pointer]
        self.pointer += 1
        return self.vectorize(x), self.vectorize(y)

    def reset_batch_pointer(self):
        self.pointer = 0

Check!


In [11]:
tf.reset_default_graph()
args = Args()
seq_length = args.seq_length
seq_count = (args.picture_size*args.picture_size)/args.seq_length
print seq_length, seq_count, seq_length* seq_count, args.picture_size*args.picture_size

faceloader = FaceLoader()
with tf.Session() as sess:
    tf.initialize_all_variables().run()
    faceloader.load_data(args.picture_size)
    faceloader.create_batch(seq_length, seq_count)
    sequence = []
    for i in range(seq_count): 
        x, y = faceloader.next_batch();
        for value in x:
            grey = np.argmax(value)
            sequence.append(grey)
    pict = np.reshape(sequence,[args.picture_size, args.picture_size])      
    plt.figure(figsize=(1,1))
    plt.figimage(pict)
    plt.show()


256 16 4096 4096
loaded <PIL.JpegImagePlugin.JpegImageFile image mode=RGB size=707x919 at 0x114071F10>

In [12]:
Image("training.jpeg")


Out[12]:

In [13]:
class Model():
    def __init__(self, args, infer=False):
        self.args = args
        if infer:
            '''Infer is true when the model is used for sampling'''
            args.seq_length = 1
   
        hidden_size = args.rnn_size
        vector_size = args.vocab_size # 2 for B & W
        
        # define place holder to for the input data and the target.
        self.input_data = tf.placeholder(tf.float32, [ args.seq_length, vector_size], name='input_data')
        self.target_data = tf.placeholder(tf.float32, [ args.seq_length, vector_size], name='target_data') 
        # define the input xs
        xs = tf.split(0, args.seq_length, self.input_data)
        # define the target
        targets = tf.split(0, args.seq_length, self.target_data)  
        #initial_state
        self.initial_state = tf.zeros((hidden_size,1))
        
        # model parameters
        Wxh = tf.Variable(tf.random_uniform((hidden_size, vector_size))*0.01, name='Wxh') # input to hidden
        Whh = tf.Variable(tf.random_uniform((hidden_size, hidden_size))*0.01, name='Whh') # hidden to hidden
        Why = tf.Variable(tf.random_uniform((vector_size, hidden_size))*0.01, name='Why') # hidden to output
        bh = tf.Variable(tf.zeros((hidden_size, 1)), name='bh') # hidden bias
        by = tf.Variable(tf.zeros((vector_size, 1)), name='by') # output bias
        loss = tf.zeros([1], name='loss')
        hs, ys, ps = {}, {}, {}
        
        hs[-1] = self.initial_state
        # forward pass                                                                                                                                                                              
        for t in xrange(args.seq_length):
            xs_t = tf.transpose(xs[t])
            #if infer and t>0: # to goes in generative mode
            #    xs_t = ps[t-1] # use the previous prediction instead of of the training data
           
            targets_t = tf.transpose(targets[t])
            
            hs[t] = tf.tanh(tf.matmul(Wxh, xs_t) + tf.matmul(Whh, hs[t-1]) + bh) # hidden state
            ys[t] = tf.matmul(Why, hs[t]) + by # unnormalized log probabilities for next pixel
            ps[t] = tf.exp(ys[t]) / tf.reduce_sum(tf.exp(ys[t])) # probabilities for next Pixel
            
            loss += -tf.log(tf.reduce_sum(tf.mul(ps[t], targets_t))) # softmax (cross-entropy loss)
            r_max =  tf.reduce_max(ps[t])
            #ps[t] = tf.tanh(10.*(ps[t]-r_max*0.99))/2 + 0.5 
            
        self.probs = tf.pack([ps[key] for key in ps])
        self.cost = loss / args.seq_length
        self.final_state = hs[args.seq_length-1]
        self.lr = tf.Variable(0.0, trainable=False, name='learning_rate')
        tvars = tf.trainable_variables()
        grads, _ = tf.clip_by_global_norm(tf.gradients(self.cost, tvars),
                args.grad_clip)
        optimizer = tf.train.AdamOptimizer(self.lr)
        self.train_op = optimizer.apply_gradients(zip(grads, tvars))
    
    def inspect(self, draw=False):
        for var in tf.all_variables():
            if var in tf.trainable_variables():
                print ('t', var.name, var.eval().shape)
                if draw:
                    plt.figure(figsize=(1,1))
                    plt.figimage(var.eval())
                    plt.show()
            else:
                print ('nt', var.name, var.eval().shape)

Training


In [14]:
tf.reset_default_graph()
args = Args()
model = Model(args)
print ("model created")
faceloader = FaceLoader()

cost_optimisation = []

seq_length = args.seq_length
seq_count = (args.picture_size*args.picture_size)/args.seq_length


with tf.Session() as sess:
    tf.initialize_all_variables().run()
    print ("variable initialized")
    saver = tf.train.Saver(tf.all_variables())

    # restore model
    if args.init_from is not None:
        ckpt = tf.train.get_checkpoint_state(args.init_from)
        assert ckpt,"No checkpoint found"
        assert ckpt.model_checkpoint_path,"No model path found in checkpoint"
        saver.restore(sess, ckpt.model_checkpoint_path)
        cost_optimisation = np.load('cost.npy').tolist()
        print ("model restored")
    for e in range(args.num_epochs):
        sess.run(tf.assign(model.lr, args.learning_rate * (args.decay_rate ** e)))
        #state = model.initial_state.eval()
        faceloader.load_data(args.picture_size)
        faceloader.create_batch(seq_length, seq_count)
        for b in range(1000):
            start = time.time()
            # Get learning data
            state = model.initial_state.eval()
            faceloader.reset_batch_pointer();
            for l in range(seq_count):
                x, y = faceloader.next_batch()
                # Create the structure for the learning data
                feed = {model.input_data: x, model.target_data: y, model.initial_state: state}
                # Run a session using train_op
                [train_loss], state, _ = sess.run([model.cost, model.final_state, model.train_op], feed)
            end = time.time()
            if (e + b) % args.print_every == 0:
                cost_optimisation.append(train_loss)
                print("{}/{} (epoch {}), train_loss = {:.6f}, time/batch = {:.3f}" \
                    .format(e + b,
                            args.num_epochs,
                            e, train_loss, end - start))
            if (e + b) % args.save_every == 0:
                checkpoint_path = os.path.join(args.save_dir, 'model.ckpt')
                saver.save(sess, checkpoint_path, global_step = e + b)
                print("model saved to {}".format(checkpoint_path))
                np.save('cost', cost_optimisation)


model created
variable initialized
loaded <PIL.JpegImagePlugin.JpegImageFile image mode=RGB size=707x919 at 0x12012FA90>
0/1 (epoch 0), train_loss = 2.311700, time/batch = 13.634
model saved to save_face_training_0.5_128/model.ckpt
---------------------------------------------------------------------------
KeyboardInterrupt                         Traceback (most recent call last)
<ipython-input-14-12125d31d1f2> in <module>()
     39                 feed = {model.input_data: x, model.target_data: y, model.initial_state: state}
     40                 # Run a session using train_op
---> 41                 [train_loss], state, _ = sess.run([model.cost, model.final_state, model.train_op], feed)
     42             end = time.time()
     43             if (e + b) % args.print_every == 0:

/Users/damienhenry/code/jupyter/lib/python2.7/site-packages/tensorflow/python/client/session.pyc in run(self, fetches, feed_dict, options, run_metadata)
    338     try:
    339       result = self._run(None, fetches, feed_dict, options_ptr,
--> 340                          run_metadata_ptr)
    341       if run_metadata:
    342         proto_data = tf_session.TF_GetBuffer(run_metadata_ptr)

/Users/damienhenry/code/jupyter/lib/python2.7/site-packages/tensorflow/python/client/session.pyc in _run(self, handle, fetches, feed_dict, options, run_metadata)
    562     try:
    563       results = self._do_run(handle, target_list, unique_fetches,
--> 564                              feed_dict_string, options, run_metadata)
    565     finally:
    566       # The movers are no longer used. Delete them.

/Users/damienhenry/code/jupyter/lib/python2.7/site-packages/tensorflow/python/client/session.pyc in _do_run(self, handle, target_list, fetch_list, feed_dict, options, run_metadata)
    635     if handle is None:
    636       return self._do_call(_run_fn, self._session, feed_dict, fetch_list,
--> 637                            target_list, options, run_metadata)
    638     else:
    639       return self._do_call(_prun_fn, self._session, handle, feed_dict,

/Users/damienhenry/code/jupyter/lib/python2.7/site-packages/tensorflow/python/client/session.pyc in _do_call(self, fn, *args)
    642   def _do_call(self, fn, *args):
    643     try:
--> 644       return fn(*args)
    645     except tf_session.StatusNotOK as e:
    646       error_message = compat.as_text(e.error_message)

/Users/damienhenry/code/jupyter/lib/python2.7/site-packages/tensorflow/python/client/session.pyc in _run_fn(session, feed_dict, fetch_list, target_list, options, run_metadata)
    626       else:
    627         return tf_session.TF_Run(
--> 628             session, None, feed_dict, fetch_list, target_list, None)
    629 
    630     def _prun_fn(session, handle, feed_dict, fetch_list):

KeyboardInterrupt: 

In [16]:
cost_optimisation = np.load('cost.npy')
plt.figure(figsize=(12,5))
plt.plot(range(len(cost_optimisation)), cost_optimisation, label='cost')
plt.legend()
plt.show()



In [17]:
tf.reset_default_graph()
args = Args()
model = Model(args, True)  # True to generate the model in sampling mode
with tf.Session() as sess:
    tf.initialize_all_variables().run()
    saver = tf.train.Saver(tf.all_variables())
    ckpt = tf.train.get_checkpoint_state(args.save_dir)
    print (ckpt)
    
    model.inspect(draw=True)


model_checkpoint_path: "save_face_training_0.5_128/model.ckpt-0"
all_model_checkpoint_paths: "save_face_training_0.5_128/model.ckpt-0"

('t', u'Wxh:0', (300, 16))
('t', u'Whh:0', (300, 300))
('t', u'Why:0', (16, 300))
('t', u'bh:0', (300, 1))
('t', u'by:0', (16, 1))
('nt', u'learning_rate:0', ())
('nt', u'beta1_power:0', ())
('nt', u'beta2_power:0', ())
('nt', u'Wxh/Adam:0', (300, 16))
('nt', u'Wxh/Adam_1:0', (300, 16))
('nt', u'Whh/Adam:0', (300, 300))
('nt', u'Whh/Adam_1:0', (300, 300))
('nt', u'Why/Adam:0', (16, 300))
('nt', u'Why/Adam_1:0', (16, 300))
('nt', u'bh/Adam:0', (300, 1))
('nt', u'bh/Adam_1:0', (300, 1))
('nt', u'by/Adam:0', (16, 1))
('nt', u'by/Adam_1:0', (16, 1))

sampling


In [18]:
def sample_with_temperature(a, temperature=1.0):
    # helper function to sample an index from a probability array
    a = np.log(a) / temperature
    a = np.exp(a) / np.sum(np.exp(a))
    return np.argmax(np.random.multinomial(1, a, 1))

In [19]:
def weighted_pick(weights):
    t = np.cumsum(weights)
    s = np.sum(weights)
    return(int(np.searchsorted(t, np.random.rand(1)*s)))

In [24]:
tf.reset_default_graph()
args = Args()
model = Model(args, infer=True)

seq_length = 1 
seq_count = args.picture_size*args.picture_size

temperature = 1

with tf.Session() as sess:
    tf.initialize_all_variables().run()
    print 'intialisation done'
    saver = tf.train.Saver(tf.all_variables())
    ckpt = tf.train.get_checkpoint_state(args.save_dir)
    print (ckpt)
    
    if ckpt and ckpt.model_checkpoint_path:
        saver.restore(sess, ckpt.model_checkpoint_path)
               
    state = model.initial_state.eval()
    x = np.random.random([seq_length, args.vocab_size])
    #x = np.zeros([seq_length, args.vocab_size])
    #x[0][13]=1   
    pict = np.zeros([seq_count])
    for i in range(seq_count):
        feed = {model.input_data: x, model.initial_state: state}
        lines, state = sess.run([model.probs, model.final_state], feed)
        x = np.squeeze(lines,2)
        grey = weighted_pick(x[0])
        #grey = np.argmax(x[0])
        pict[i] = grey/16.
        x = np.zeros([seq_length, args.vocab_size])
        x[0][grey]=1
        if i%(args.picture_size*10) == 0:
            print i, '/', args.picture_size*args.picture_size
        
    picture = np.reshape(pict.transpose(), [args.picture_size, args.picture_size])
    plt.figure(figsize=(1,1))
    plt.figimage(picture)
    plt.show()


intialisation done
model_checkpoint_path: "save_face_training_0.5_128/model.ckpt-0"
all_model_checkpoint_paths: "save_face_training_0.5_128/model.ckpt-0"

0 / 4096
640 / 4096
1280 / 4096
1920 / 4096
2560 / 4096
3200 / 4096
3840 / 4096

In [22]:
from PIL import Image as Img
im = Img.fromarray((picture * 255).astype(np.uint8))
im.save("result.jpeg")

In [23]:
Image("result.jpeg")


Out[23]:

Feedback welcome @dh7net